{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 14 Standard deviation of residuals or root mean square deviation (RMSD)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%html\n",
""
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from pandas import Series, DataFrame\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from scipy import stats\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.metrics import mean_squared_error"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[khanacademy](https://www.khanacademy.org/math/ap-statistics/bivariate-data-ap/assessing-fit-least-squares-regression/v/standard-dev-residuals?modal=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"x = np.array([1, 2, 2, 3])\n",
"y = np.array([1, 2, 3, 6])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"slope, intercept, rvalue, pvalue, stderr = stats.linregress(x, y)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"yhat = intercept + slope * x"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"x_test = x.reshape(-1, 1)\n",
"y_test = y.reshape(-1, 1)\n",
"reg = LinearRegression()\n",
"reg.fit(x_test, y_test)\n",
"y_pred = reg.predict(x_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"df = DataFrame({\n",
" 'x': x,\n",
" 'y': y,\n",
" 'yhat': yhat,\n",
" '(y - yhat)**2': (y - yhat)**2\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" x | \n",
" y | \n",
" yhat | \n",
" (y - yhat)**2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 0.5 | \n",
" 0.25 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 2 | \n",
" 3.0 | \n",
" 1.00 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 3 | \n",
" 3.0 | \n",
" 0.00 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 6 | \n",
" 5.5 | \n",
" 0.25 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" x y yhat (y - yhat)**2\n",
"0 1 1 0.5 0.25\n",
"1 2 2 3.0 1.00\n",
"2 2 3 3.0 0.00\n",
"3 3 6 5.5 0.25"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(4,)"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"$$ \\sqrt{ \\frac{ \\sum (residual)^{2} }{n-2} } $$\n",
"\n",
"$$ \\sqrt { \\frac{ \\sum (y-\\hat{y})^{2} }{n-2} } $$"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
"def rmsd(y, yhat, i=0):\n",
" n = len(y)\n",
" return np.sqrt(np.sum((y - yhat) ** 2) / (n - i))"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"divided by 1: \t 0.7071067811865476\n",
"divided by 2: \t 0.8660254037844386\n",
"divided by 0: \t 0.6123724356957945\n"
]
}
],
"source": [
"print('divided by 1: \\t', rmsd(y, yhat, 1))\n",
"print('divided by 2: \\t', rmsd(y, yhat, 2))\n",
"print('divided by 0: \\t', rmsd(y, yhat))"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"0.6123724356957947"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stderr"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.6123724356957945"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mean_squared_error(y_test, y_pred, squared=False)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Score')"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.scatterplot(x, y, color='k')\n",
"sns.lineplot(x, yhat)\n",
"plt.xlabel('Hours Studying')\n",
"plt.ylabel('Score')"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Score')"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(x_test, y_test, color='k')\n",
"plt.plot(x_test, y_pred)\n",
"plt.xlabel('Hours Studying')\n",
"plt.ylabel('Score')"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 4
}